Welcome!
For this EDA, this research will use UK Road Safety: Traffic Accidents (2005-2017).
reference: https://www.kaggle.com/tsiaras/uk-road-safety-accidents-and-vehicles
These files provide detailed road safety data about the circumstances of personal injury road accidents in GB from 1979, the types of vehicles involved and the consequential casualties. The statistics relate only to personal injury accidents on public roads that are reported to the police, and subsequently recorded, using the STATS19 accident reporting form.
## [1] "Accident_Index"
## [2] "X1st_Road_Class"
## [3] "X1st_Road_Number"
## [4] "X2nd_Road_Class"
## [5] "X2nd_Road_Number"
## [6] "Accident_Severity"
## [7] "Carriageway_Hazards"
## [8] "Date"
## [9] "Day_of_Week"
## [10] "Did_Police_Officer_Attend_Scene_of_Accident"
## [11] "Junction_Control"
## [12] "Junction_Detail"
## [13] "Latitude"
## [14] "Light_Conditions"
## [15] "Local_Authority_.District."
## [16] "Local_Authority_.Highway."
## [17] "Location_Easting_OSGR"
## [18] "Location_Northing_OSGR"
## [19] "Longitude"
## [20] "LSOA_of_Accident_Location"
## [21] "Number_of_Casualties"
## [22] "Number_of_Vehicles"
## [23] "Pedestrian_Crossing.Human_Control"
## [24] "Pedestrian_Crossing.Physical_Facilities"
## [25] "Police_Force"
## [26] "Road_Surface_Conditions"
## [27] "Road_Type"
## [28] "Special_Conditions_at_Site"
## [29] "Speed_limit"
## [30] "Time"
## [31] "Urban_or_Rural_Area"
## [32] "Weather_Conditions"
## [33] "Year"
## [34] "InScotland"
## Rows: 2,047,256
## Columns: 34
## $ Accident_Index <chr> "200501BS00001", "20050...
## $ X1st_Road_Class <chr> "A", "B", "C", "A", "Un...
## $ X1st_Road_Number <int> 3218, 450, 0, 3220, 0, ...
## $ X2nd_Road_Class <chr> NA, "C", NA, NA, NA, NA...
## $ X2nd_Road_Number <int> 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Accident_Severity <chr> "Serious", "Slight", "S...
## $ Carriageway_Hazards <chr> "None", "None", "None",...
## $ Date <chr> "2005-01-04", "2005-01-...
## $ Day_of_Week <chr> "Tuesday", "Wednesday",...
## $ Did_Police_Officer_Attend_Scene_of_Accident <int> 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Junction_Control <chr> "Data missing or out of...
## $ Junction_Detail <chr> "Not at junction or wit...
## $ Latitude <dbl> 51.48910, 51.52007, 51....
## $ Light_Conditions <chr> "Daylight", "Darkness -...
## $ Local_Authority_.District. <chr> "Kensington and Chelsea...
## $ Local_Authority_.Highway. <chr> "Kensington and Chelsea...
## $ Location_Easting_OSGR <int> 525680, 524170, 524520,...
## $ Location_Northing_OSGR <int> 178240, 181650, 182240,...
## $ Longitude <dbl> -0.191170, -0.211708, -...
## $ LSOA_of_Accident_Location <chr> "E01002849", "E01002909...
## $ Number_of_Casualties <int> 1, 1, 1, 1, 1, 1, 1, 2,...
## $ Number_of_Vehicles <int> 1, 1, 2, 1, 1, 2, 2, 1,...
## $ Pedestrian_Crossing.Human_Control <int> 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Pedestrian_Crossing.Physical_Facilities <int> 1, 5, 0, 0, 0, 0, 0, 0,...
## $ Police_Force <chr> "Metropolitan Police", ...
## $ Road_Surface_Conditions <chr> "Wet or damp", "Dry", "...
## $ Road_Type <chr> "Single carriageway", "...
## $ Special_Conditions_at_Site <chr> "None", "None", "None",...
## $ Speed_limit <int> 30, 30, 30, 30, 30, 30,...
## $ Time <chr> "17:42", "17:36", "00:1...
## $ Urban_or_Rural_Area <chr> "Urban", "Urban", "Urba...
## $ Weather_Conditions <chr> "Raining no high winds"...
## $ Year <int> 2005, 2005, 2005, 2005,...
## $ InScotland <chr> "No", "No", "No", "No",...
traffic$X1st_Road_Class <- as.factor(traffic$X1st_Road_Class)
traffic$X1st_Road_Number <- as.factor(traffic$X1st_Road_Number)
traffic$X2nd_Road_Class <- as.factor(traffic$X2nd_Road_Class)
traffic$X2nd_Road_Number <- as.factor(traffic$X2nd_Road_Number)
traffic$Accident_Severity <- as.factor(traffic$Accident_Severity)
traffic$Carriageway_Hazards <- as.factor(traffic$Carriageway_Hazards)
traffic$Day_of_Week <- as.factor(traffic$Day_of_Week)
traffic$Did_Police_Officer_Attend_Scene_of_Accident <- as.factor(traffic$Did_Police_Officer_Attend_Scene_of_Accident)
traffic$Junction_Control <- as.factor(traffic$Junction_Control)
traffic$Junction_Detail <- as.factor(traffic$Junction_Detail)
traffic$Light_Conditions <- as.factor(traffic$Light_Conditions)
traffic$Local_Authority_.District. <- as.factor(traffic$Local_Authority_.District.)
traffic$Local_Authority_.Highway. <- as.factor(traffic$Local_Authority_.Highway.)
traffic$Location_Easting_OSGR <- as.factor(traffic$Location_Easting_OSGR)
traffic$Location_Northing_OSGR <- as.factor(traffic$Location_Northing_OSGR)
traffic$LSOA_of_Accident_Location <- as.factor(traffic$LSOA_of_Accident_Location)
traffic$Police_Force <- as.factor(traffic$Police_Force)
traffic$Road_Surface_Conditions <- as.factor(traffic$Road_Surface_Conditions)
traffic$Road_Type <- as.factor(traffic$Road_Type)
traffic$Special_Conditions_at_Site <- as.factor(traffic$Special_Conditions_at_Site)
traffic$Speed_limit <- as.factor(traffic$Speed_limit)
traffic$Urban_or_Rural_Area <- as.factor(traffic$Urban_or_Rural_Area)
traffic$Weather_Conditions <- as.factor(traffic$Weather_Conditions)
traffic$InScotland <- as.factor(traffic$InScotland)
traffic$Year <- as.factor(traffic$Year)## 'data.frame': 2047256 obs. of 34 variables:
## $ Accident_Index : chr "200501BS00001" "200501BS00002" "200501BS00003" "200501BS00004" ...
## $ X1st_Road_Class : Factor w/ 6 levels "A","A(M)","B",..: 1 3 4 1 6 6 4 1 1 3 ...
## $ X1st_Road_Number : Factor w/ 7160 levels "0","1","2","3",..: 2915 451 1 2917 1 1 1 316 2909 451 ...
## $ X2nd_Road_Class : Factor w/ 6 levels "A","A(M)","B",..: NA 4 NA NA NA NA 6 NA 3 4 ...
## $ X2nd_Road_Number : Factor w/ 7512 levels "0","1","2","3",..: 1 1 1 1 1 1 1 1 305 1 ...
## $ Accident_Severity : Factor w/ 3 levels "Fatal","Serious",..: 2 3 3 3 3 3 3 3 3 3 ...
## $ Carriageway_Hazards : Factor w/ 7 levels "Any animal in carriageway (except ridden horse)",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Date : Date, format: "2005-01-04" "2005-01-05" ...
## $ Day_of_Week : Factor w/ 7 levels "Friday","Monday",..: 6 7 5 1 2 6 5 1 3 3 ...
## $ Did_Police_Officer_Attend_Scene_of_Accident: Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ Junction_Control : Factor w/ 6 levels "Authorised person",..: 3 2 3 3 3 3 4 3 2 4 ...
## $ Junction_Detail : Factor w/ 10 levels "Crossroads","Data missing or out of range",..: 5 1 5 5 5 5 10 5 1 10 ...
## $ Latitude : num 51.5 51.5 51.5 51.5 51.5 ...
## $ Light_Conditions : Factor w/ 6 levels "Darkness - lighting unknown",..: 6 2 2 6 1 6 2 6 2 6 ...
## $ Local_Authority_.District. : Factor w/ 416 levels "Aberdeen City",..: 183 183 183 183 183 183 183 183 183 183 ...
## $ Local_Authority_.Highway. : Factor w/ 207 levels "Aberdeen City",..: 90 90 90 90 90 90 90 90 90 90 ...
## $ Location_Easting_OSGR : Factor w/ 279456 levels "64950","64980",..: 223024 221891 222168 224082 225049 222341 221931 223193 224470 222187 ...
## $ Location_Northing_OSGR : Factor w/ 344815 levels "10290","10304",..: 82209 85082 85526 81630 82846 84676 84381 83428 81717 84362 ...
## $ Longitude : num -0.191 -0.212 -0.206 -0.174 -0.157 ...
## $ LSOA_of_Accident_Location : Factor w/ 35565 levels "","E01000001",..: 2848 2908 2856 2839 2862 2831 2874 2888 2899 2874 ...
## $ Number_of_Casualties : int 1 1 1 1 1 1 1 2 2 5 ...
## $ Number_of_Vehicles : int 1 1 2 1 1 2 2 1 2 2 ...
## $ Pedestrian_Crossing.Human_Control : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Pedestrian_Crossing.Physical_Facilities : int 1 5 0 0 0 0 0 0 5 8 ...
## $ Police_Force : Factor w/ 51 levels "Avon and Somerset",..: 30 30 30 30 30 30 30 30 30 30 ...
## $ Road_Surface_Conditions : Factor w/ 6 levels "Data missing or out of range",..: 6 2 2 2 6 6 2 2 2 2 ...
## $ Road_Type : Factor w/ 7 levels "Data missing or out of range",..: 5 2 5 5 5 5 5 2 5 5 ...
## $ Special_Conditions_at_Site : Factor w/ 9 levels "Auto signal part defective",..: 5 5 5 5 5 6 5 5 5 5 ...
## $ Speed_limit : Factor w/ 9 levels "0","10","15",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ Time :Formal class 'Period' [package "lubridate"] with 6 slots
## .. ..@ .Data : num 0 0 0 0 0 0 0 0 0 0 ...
## .. ..@ year : num 0 0 0 0 0 0 0 0 0 0 ...
## .. ..@ month : num 0 0 0 0 0 0 0 0 0 0 ...
## .. ..@ day : num 0 0 0 0 0 0 0 0 0 0 ...
## .. ..@ hour : num 17 17 0 10 21 12 20 17 22 16 ...
## .. ..@ minute: num 42 36 15 35 13 40 40 35 43 0 ...
## $ Urban_or_Rural_Area : Factor w/ 3 levels "Rural","Unallocated",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Weather_Conditions : Factor w/ 10 levels "Data missing or out of range",..: 7 3 3 3 3 7 3 3 3 3 ...
## $ Year : Factor w/ 13 levels "2005","2006",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ InScotland : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
NULL values## Accident_Index
## 0
## X1st_Road_Class
## 0
## X1st_Road_Number
## 2
## X2nd_Road_Class
## 844272
## X2nd_Road_Number
## 17593
## Accident_Severity
## 0
## Carriageway_Hazards
## 0
## Date
## 0
## Day_of_Week
## 0
## Did_Police_Officer_Attend_Scene_of_Accident
## 278
## Junction_Control
## 0
## Junction_Detail
## 0
## Latitude
## 174
## Light_Conditions
## 0
## Local_Authority_.District.
## 0
## Local_Authority_.Highway.
## 0
## Location_Easting_OSGR
## 164
## Location_Northing_OSGR
## 164
## Longitude
## 175
## LSOA_of_Accident_Location
## 0
## Number_of_Casualties
## 0
## Number_of_Vehicles
## 0
## Pedestrian_Crossing.Human_Control
## 2920
## Pedestrian_Crossing.Physical_Facilities
## 3560
## Police_Force
## 0
## Road_Surface_Conditions
## 0
## Road_Type
## 0
## Special_Conditions_at_Site
## 0
## Speed_limit
## 37
## Time
## 156
## Urban_or_Rural_Area
## 0
## Weather_Conditions
## 0
## Year
## 0
## InScotland
## 53
## Rows: 2,042,570
## Columns: 28
## $ Accident_Index <chr> "200501BS00001", "20050...
## $ Accident_Severity <fct> Serious, Slight, Slight...
## $ Carriageway_Hazards <fct> None, None, None, None,...
## $ Date <date> 2005-01-04, 2005-01-05...
## $ Day_of_Week <fct> Tuesday, Wednesday, Thu...
## $ Did_Police_Officer_Attend_Scene_of_Accident <fct> 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Junction_Control <fct> Data missing or out of ...
## $ Junction_Detail <fct> Not at junction or with...
## $ Latitude <dbl> 51.48910, 51.52007, 51....
## $ Light_Conditions <fct> Daylight, Darkness - li...
## $ Local_Authority_.District. <fct> Kensington and Chelsea,...
## $ Local_Authority_.Highway. <fct> Kensington and Chelsea,...
## $ Longitude <dbl> -0.191170, -0.211708, -...
## $ LSOA_of_Accident_Location <fct> E01002849, E01002909, E...
## $ Number_of_Casualties <int> 1, 1, 1, 1, 1, 1, 1, 2,...
## $ Number_of_Vehicles <int> 1, 1, 2, 1, 1, 2, 2, 1,...
## $ Pedestrian_Crossing.Human_Control <int> 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Pedestrian_Crossing.Physical_Facilities <int> 1, 5, 0, 0, 0, 0, 0, 0,...
## $ Police_Force <fct> Metropolitan Police, Me...
## $ Road_Surface_Conditions <fct> Wet or damp, Dry, Dry, ...
## $ Road_Type <fct> Single carriageway, Dua...
## $ Special_Conditions_at_Site <fct> None, None, None, None,...
## $ Speed_limit <fct> 30, 30, 30, 30, 30, 30,...
## $ Time <Period> 17H 42M 0S, 17H 36M ...
## $ Urban_or_Rural_Area <fct> Urban, Urban, Urban, Ur...
## $ Weather_Conditions <fct> Raining no high winds, ...
## $ Year <fct> 2005, 2005, 2005, 2005,...
## $ InScotland <fct> No, No, No, No, No, No,...
traffic %>%
group_by(Accident_Severity, Year) %>%
dplyr::summarise(n_Year = n()) %>%
arrange(- n_Year) %>%
ggplot(aes(x = Year,
y = n_Year,
fill = Accident_Severity)) +
geom_col(position = "stack", stat = "identity") +
theme_bw() +
scale_fill_manual(values = rev(brewer.pal(15, "Set3"))) +
labs(title = "Traffic per Year X Accident Severity",
subtitle = "(2005-17)",
x = "Year",
y = "Freq")traffic %>%
group_by(Accident_Severity, Day_of_Week) %>%
dplyr::summarise(n_Day = n()) %>%
arrange(- n_Day) %>%
ggplot(aes(reorder(x = Day_of_Week, -n_Day),
y = n_Day,
fill = Accident_Severity)) +
geom_col(position = "stack", stat = "identity") +
theme_bw() +
scale_fill_manual(values = rev(brewer.pal(7, "Set3"))) +
labs(title = "Traffic per Day X Accident Severity",
subtitle = "(2005-17)",
x = "Day of Week",
y = "Freq")## `summarise()` regrouping output by 'Accident_Severity' (override with `.groups` argument)
## Warning: Ignoring unknown parameters: stat
plot_Junction_Control <- traffic %>%
group_by(Accident_Severity, Junction_Control, Year) %>%
dplyr::summarise(n_Junction_Control = n()) %>%
arrange(- n_Junction_Control)plot_Junction_Detail <- traffic %>%
group_by(Accident_Severity, Junction_Detail, Year) %>%
dplyr::summarise(n_Junction_Detail = n()) %>%
arrange(- n_Junction_Detail)plot_Urban_or_Rural_Area <- traffic %>%
group_by(Accident_Severity, Urban_or_Rural_Area, Year) %>%
dplyr::summarise(n_Urban_or_Rural_Area = n()) %>%
arrange(- n_Urban_or_Rural_Area)plot_Road_Type <- traffic %>%
group_by(Accident_Severity, Road_Type, Year) %>%
dplyr::summarise(n_Road_Type = n()) %>%
arrange(- n_Road_Type)plot_Road_Surface_Conditions <- traffic %>%
group_by(Accident_Severity, Road_Surface_Conditions, Year) %>%
dplyr::summarise(n_Road_Surface_Conditions = n()) %>%
arrange(- n_Road_Surface_Conditions)plot_Speed_limit <- traffic %>%
group_by(Accident_Severity, Speed_limit, Year) %>%
dplyr::summarise(n_Speed_limit = n()) %>%
arrange(- n_Speed_limit)plot_Weather_Conditions <- traffic %>%
group_by(Accident_Severity, Weather_Conditions, Year) %>%
dplyr::summarise(n_Weather_Conditions = n()) %>%
arrange(- n_Weather_Conditions)plot_Junction_Control %>%
filter(Junction_Control != "Data missing or out of range") %>%
ggplot(aes(fill=Junction_Control, y=n_Junction_Control, x=Accident_Severity)) +
geom_bar(position="fill", stat="identity") +
theme_bw() +
labs(title = "Accident Severity X Junction Control",
subtitle = "(2005-17)",
x = "Accident Severity",
y = "Percentage") +
scale_fill_manual(values = rev(brewer.pal(5, "Set3")))plot_Junction_Detail %>%
ggplot(aes(fill=Junction_Detail, y=n_Junction_Detail, x=Accident_Severity)) +
geom_bar(position="fill", stat="identity") +
theme_bw() +
labs(title = "Accident Severity X Junction Detail",
subtitle = "(2005-17)",
x = "Accident Severity",
y = "Percentage") +
scale_fill_manual(values = rev(brewer.pal(10, "Set3")))plot_Urban_or_Rural_Area %>%
ggplot(aes(fill=Urban_or_Rural_Area, y=n_Urban_or_Rural_Area, x=Accident_Severity)) +
geom_bar(position="fill", stat="identity") +
theme_bw() +
labs(title = "Accident Severity X Urban/Rural Area",
subtitle = "(2005-17)",
x = "Accident Severity",
y = "Percentage") +
scale_fill_manual(values = rev(brewer.pal(10, "Set3")))plot_Road_Type %>%
ggplot(aes(fill=Road_Type, y=n_Road_Type, x=Accident_Severity)) +
geom_bar(position="fill", stat="identity") +
theme_bw() +
labs(title = "Accident Severity X Road Type",
subtitle = "(2005-17)",
x = "Accident Severity",
y = "Percentage") +
scale_fill_manual(values = rev(brewer.pal(10, "Set3")))plot_Road_Surface_Conditions %>%
ggplot(aes(fill=Road_Surface_Conditions, y=n_Road_Surface_Conditions, x=Accident_Severity)) +
geom_bar(position="fill", stat="identity") +
theme_bw() +
labs(title = "Accident Severity X Road Survey Condition",
subtitle = "(2005-17)",
x = "Accident Severity",
y = "Percentage") +
scale_fill_manual(values = rev(brewer.pal(10, "Set3")))plot_Speed_limit %>%
ggplot(aes(fill=Speed_limit, y=n_Speed_limit, x=Accident_Severity)) +
geom_bar(position="fill", stat="identity") +
theme_bw() +
labs(title = "Accident Severity X Speed Limit",
subtitle = "(2005-17)",
x = "Accident Severity",
y = "Percentage") +
scale_fill_manual(values = rev(brewer.pal(10, "Set3")))plot_Weather_Conditions %>%
ggplot(aes(fill=Weather_Conditions, y=n_Weather_Conditions, x=Accident_Severity)) +
geom_bar(position="fill", stat="identity") +
theme_bw() +
labs(title = "Accident Severity X Weather Condition",
subtitle = "(2005-17)",
x = "Accident Severity",
y = "Percentage") +
scale_fill_manual(values = rev(brewer.pal(15, "Set3")))plot_Light_Conditions %>%
ggplot(aes(fill=Light_Conditions, y=n_Light_Conditions, x=Accident_Severity)) +
geom_bar(position="fill", stat="identity") +
theme_bw() +
labs(title = "Accident Severity X Light Condition",
subtitle = "(2005-17)",
x = "Accident Severity",
y = "Percentage") +
scale_fill_manual(values = rev(brewer.pal(15, "Set3")))Trying to use leaflet
n_Year <- traffic %>%
group_by(Accident_Severity, Longitude, Latitude) %>%
dplyr::summarise(n_traffic = n()) %>%
arrange(- n_traffic)
leaflet() %>%
addTiles() %>%
setView( lng = -1.9, lat = 55, zoom = 5 ) %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
ggplot() +
geom_polygon(data = traffic,
aes(x = Longitude,
y = Latitude,
group = Accident_Severity),
fill="grey",
alpha=0.3) +
geom_point( data = traffic,
aes(x = Longitude,
y = Latitude,
size = n_traffic,
alpha=Accident_Severity)) +
geom_text_repel( data=traffic %>%
arrange(Accident_Severity) %>%
tail(10), aes(x=Longitude,
y=Latitude,
label=Accident_Severity),
size=5) +
geom_point( data=traffic %>%
arrange(Accident_Severity) %>%
tail(10),
aes(x=Longitude,
y=Latitude),
color="red",
size=3) +
theme_void() +
ylim(50,59) +
coord_map() +
theme(legend.position="none")
traffic %>%
filter(Accident_Severity == "Fatal") %>%
group_by(Local_Authority_.District., Police_Force) %>%
dplyr::count(sort = TRUE) %>%
head(5)